In [1]:
import os

## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')

import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
/hpc/group/pbenfeylab/ch416/miniconda3/envs/genesys/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
In [2]:
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__) 
1.11.0
1.9.6
In [3]:
## Genes considered/used (shared among samples) 
gene_list = pd.read_csv('./gene_list_1108.csv')

Load Data¶

In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
    data = pickle.load(file_handle)
    
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
                         batch_size = batch_size,
                         shuffle = True, drop_last=True)
In [5]:
input_size = data['X_train'].shape[1]
## 10 cell types 
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"

Load trained GeneSys model¶

In [6]:
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"/workstation/genesys_model_trained_on_root_atlas_20240308_continue4.pth", map_location=torch.device('cpu')))
model = model
model.eval()
Out[6]:
ClassifierLSTM(
  (fc1): Sequential(
    (0): Linear(in_features=17513, out_features=256, bias=True)
    (1): Dropout(p=0.2, inplace=False)
    (2): GaussianNoise()
  )
  (fc): Sequential(
    (0): ReLU()
    (1): Linear(in_features=512, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=10, bias=True)
  )
  (lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (b_to_z): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (bz2_infer_z1): DBlock(
    (fc1): Linear(in_features=1024, out_features=256, bias=True)
    (fc2): Linear(in_features=1024, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z1_to_z2): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z_to_x): Decoder(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=256, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=17513, bias=True)
  )
)
In [7]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [8]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
In [9]:
## GRN for the transition t5 to t7
for ct in cts:
    print(ct)
    cws = np.zeros((len(loader), 17513, 17513))
    with torch.no_grad():
        for i, sample in enumerate(loader):
            x = sample['x'].to(device)
            y = sample['y'].to(device)
            y_label = [num2class[i] for i in y.tolist()]
            
            pred_h = model.init_hidden(batch_size)
            tfrom = model.generate_next(x, pred_h, 4).to('cpu').detach().numpy()
            cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
            
            pred_h = model.init_hidden(batch_size)
            tto = model.generate_next(x, pred_h, 6).to('cpu').detach().numpy()   
            cto = tto[np.where(np.array(y_label)==ct)[0],:]
            
            cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
            cws[i] = cw
    
    ## Calculate mean across number of repeats
    cwm = np.mean(cws, axis=0)
    ctw[cts.index(ct)] = cwm
Atrichoblast
Trichoblast
Cortex
Endodermis
Pericycle
Procambium
Xylem
Phloem
Lateral Root Cap
Columella
In [10]:
# Save the array to disk
np.save('genesys_ctw_t5-t7.npy', ctw)
In [11]:
ctw = np.load('genesys_ctw_t5-t7.npy')
In [12]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
    ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [13]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
    ctw_f[i] = np.abs(ctw_z[i]) > threshold

Load TFs list¶

In [14]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [15]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [16]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[16]:
Name
NAC001    1
PRE5      1
MYB118    1
MYB21     1
MYB0      1
Name: count, dtype: int64

Network analysis¶

In [17]:
TFidx = []
for i in wanted_TFs['GeneID']:
    if i in gene_list['features'].tolist():
        TFidx.append(np.where(gene_list['features']==i)[0][0])

TFidx = np.sort(np.array(TFidx))
In [19]:
def network(i):
    ## No weights
    adj_nw = ctw_f[i]
    ## Weighted
    adj = ctw[i]*ctw_f[i]
    ## TF only
    adj = adj[np.ix_(TFidx,TFidx)]
    adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
    
    ## Remove no connect 
    regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
    taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
    ## Reciprocol
    keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
    #keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
    
    TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
    ## TF name to keep
    TFname = []
    for i in np.array(gene_list['features'][TFidx])[keepidx]:
        TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
        
    adj = adj[np.ix_(keepidx,keepidx)]
    
    # Create a NetworkX graph for non-directed edges
    G = nx.Graph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(j, i, weight=abs(weight), distance=1/abs(weight))
            

    ## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
    closeness_centrality = nx.closeness_centrality(G, distance='distance')
    ## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
    eigenvector_centrality = nx.eigenvector_centrality(G)
    
    # Create a NetworkX graph for diected edges
    G = nx.MultiDiGraph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(j, i, weight=weight)
    
    ## Measures the number of connections (edges) each node has
    degree_centrality = nx.degree_centrality(G)
    # Calculate outgoing centrality
    out_centrality = nx.out_degree_centrality(G)
    # Calculate incoming centrality
    in_centrality = nx.in_degree_centrality(G)
    ## Measures the extent to which a node lies on the shortest paths between other nodes.
    betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
    
    ## Non_Reciprocal Out centrality
    # Visualize the graph
    pos = nx.spring_layout(G)  # Positions of the nodes
    
    # Node colors based on weighted betweenness centrality
    node_colors = [out_centrality[node] for node in G.nodes()]
    
    # Node sizes based on weighted betweenness centrality
    node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]

    # Get the edge weights as a dictionary
    edge_weights = nx.get_edge_attributes(G, 'weight')
    edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
    
    # Scale the edge weights to desired linewidths
    max_weight = max(edge_weights.values())
    edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
    
    # Draw the graph
    nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
    # Add node labels
    labels = {node: G.nodes[node]['name'] for node in G.nodes}
    nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
    
    # Add a colorbar to show the weighted betweenness centrality color mapping
    sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
    sm.set_array([])
    plt.colorbar(sm)
    
    # Show the plot
    plt.show()
    
    dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
    oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
    ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
    bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
    cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
    ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
    df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
    df.index =TFname
    df = df.sort_values('betweenness_centrality', ascending=False)
    
    return(df)
In [20]:
atri = network(0)
No description has been provided for this image
In [21]:
tri = network(1)
No description has been provided for this image
In [22]:
cor = network(2)
No description has been provided for this image
In [23]:
end = network(3)
No description has been provided for this image
In [24]:
per = network(4)
No description has been provided for this image
In [25]:
pro = network(5)
No description has been provided for this image
In [26]:
xyl = network(6)
No description has been provided for this image
In [27]:
phl = network(7)
No description has been provided for this image
In [28]:
lrc = network(8)
No description has been provided for this image
In [29]:
col = network(9)
No description has been provided for this image
In [30]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [44]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts())
tf_occurance = tf_occurance.rename(columns={
    'count': 'tf_occurance'
})
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [45]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[45]:
atri_betweenness_centrality tri_betweenness_centrality lrc_betweenness_centrality atri_out_centrality tri_out_centrality lrc_out_centrality atri_in_centrality tri_in_centrality lrc_in_centrality centrality_count centrality_sum
ARR6 0.747284 0.547855 0.000019 0.495413 0.146479 0.054432 0.204893 0.050704 0.043546 9 11.290626
In [46]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[46]:
atri_betweenness_centrality tri_betweenness_centrality atri_out_centrality tri_out_centrality atri_in_centrality tri_in_centrality centrality_count centrality_sum
ARR5 0.886747 0.826697 0.513761 0.366197 0.351682 0.115493 6 9.060577
LRL3 0.003058 0.897422 0.003058 0.909859 0.189602 0.242254 6 8.245253
AT3G05860 0.960188 0.373582 0.278287 0.047887 0.321101 0.098592 6 8.079637
WRKY61 0.007120 0.453028 0.370031 0.388732 0.103976 0.036620 6 7.359506
AT2G37120 0.245933 0.174799 0.030581 0.295775 0.510703 0.090141 6 7.347932
HB17 0.754826 0.000517 0.314985 0.028169 0.085627 0.025352 6 7.209476
ZFHD1 0.023245 0.000517 0.055046 0.016901 0.440367 0.121127 6 6.657204
In [47]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[47]:
atri_betweenness_centrality atri_out_centrality atri_in_centrality centrality_count centrality_sum
MC2 0.929176 0.131498 0.152905 3 4.213579
TTG2 0.906371 0.235474 0.189602 3 4.331448
NAC6 0.899280 0.406728 0.159021 3 4.465029
HB24 0.895330 0.192661 0.107034 3 4.195024
GL2 0.760577 0.149847 0.226300 3 4.136724
RMR1 0.522701 0.180428 0.015291 3 3.718420
PAT1 0.380199 0.021407 0.006116 3 3.407722
AT3G13840 0.364036 0.015291 0.137615 3 3.516942
AT2G28710 0.352142 0.382263 0.137615 3 3.872019
OFP18 0.280483 0.125382 0.244648 3 3.650513
HB30 0.202370 0.027523 0.079511 3 3.309403
ARF17 0.127924 0.033639 0.039755 3 3.201319
TGA3 0.041904 0.051988 0.021407 3 3.115298
AIP2 0.030938 0.296636 0.061162 3 3.388736
AT5G22890 0.026679 0.275229 0.033639 3 3.335547
KAN 0.015178 0.137615 0.079511 3 3.232303
PHE1 0.010797 0.051988 0.003058 3 3.065843
AT5G58900 0.010638 0.113150 0.024465 3 3.148252
GATA17 0.008264 0.021407 0.048930 3 3.078601
FIT 0.006107 0.082569 0.217125 3 3.305801
AT2G18670 0.002730 0.125382 0.079511 3 3.207623
AT1G21580 0.000535 0.103976 0.030581 3 3.135091
NLP7 0.000478 0.067278 0.051988 3 3.119744
BZO2H3 0.000394 0.085627 0.082569 3 3.168590
MBD1 0.000328 0.073394 0.036697 3 3.110420
HSFB3 0.000188 0.088685 0.030581 3 3.119454
AT1G25550 0.000019 0.100917 0.042813 3 3.143750
NLP4 0.000009 0.100917 0.012232 3 3.113159
AT4G22820 0.000009 0.061162 0.018349 3 3.079520
WRKY47 0.000009 0.146789 0.036697 3 3.183496
KNAT5 0.000009 0.085627 0.042813 3 3.128450
In [48]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[48]:
tri_betweenness_centrality tri_out_centrality tri_in_centrality centrality_count centrality_sum
AT4G09100 0.936047 0.828169 0.377465 3 5.141681
AT3G53370 0.921238 0.526761 0.121127 3 4.569125
RSL4 0.878292 0.535211 0.067606 3 4.481109
AT5G06800 0.863786 0.259155 0.149296 3 4.272237
RSL2 0.821302 0.614085 0.078873 3 4.514260
RHD6 0.424899 0.645070 0.076056 3 4.146025
AT5G56200 0.396873 0.095775 0.016901 3 3.509549
OFP13 0.390563 0.005634 0.070423 3 3.466619
RAP2.11 0.007862 0.132394 0.014085 3 3.154341
EIL2 0.002101 0.008451 0.036620 3 3.047171
AT5G65130 0.000358 0.005634 0.019718 3 3.025710
AT2G05160 0.000215 0.292958 0.019718 3 3.312891
AT4G39160 0.000024 0.214085 0.016901 3 3.231010
AT2G20030 0.000024 0.019718 0.030986 3 3.050728
HB16 0.000008 0.056338 0.205634 3 3.261980
AT4G01350 0.000008 0.005634 0.028169 3 3.033811
In [49]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[49]:
lrc_betweenness_centrality lrc_out_centrality lrc_in_centrality centrality_count centrality_sum
CRF2 0.982212 0.113530 0.088647 3 4.184389
GATA2 0.946776 0.594090 0.553655 3 5.094521
WRKY17 0.940200 0.261275 0.367030 3 4.568504
WER 0.927465 0.152411 0.281493 3 4.361368
ERF9 0.852882 0.026439 0.085537 3 3.964858
NAC016 0.843515 0.060653 0.143079 3 4.047247
PLT1 0.841465 0.186625 0.062208 3 4.090299
OFP6 0.833140 0.506998 0.188180 3 4.528318
AT1G74840 0.662716 0.202177 0.090202 3 3.955095
HMGB4 0.492863 0.032659 0.063764 3 3.589286
BZIP34 0.424759 0.172628 0.063764 3 3.661151
LBD4 0.307592 0.040435 0.111975 3 3.460003
AT1G69030 0.307578 0.195956 0.079316 3 3.582850
RBR1 0.170933 0.076205 0.041991 3 3.289129
NAC060 0.067661 0.083981 0.035770 3 3.187412
AT5G18090 0.066297 0.110420 0.063764 3 3.240481
ATS 0.066007 0.267496 0.043546 3 3.377049
PRR7 0.034161 0.073095 0.027994 3 3.135250
3xHMG-box2 0.003110 0.004666 0.309487 3 3.317263
BRM 0.002919 0.020218 0.043546 3 3.066683
CHR38 0.001359 0.012442 0.181960 3 3.195760
ZF1 0.000732 0.082426 0.021773 3 3.104931
AT3G52250 0.000664 0.031104 0.017107 3 3.048875
GRF3 0.000497 0.087092 0.013997 3 3.101585
AT1G11950 0.000177 0.029549 0.026439 3 3.056164
RR10 0.000114 0.119751 0.032659 3 3.152524
GRF2 0.000034 0.149300 0.009331 3 3.158665
BIM2 0.000019 0.052877 0.055988 3 3.108884
BNQ3 0.000007 0.200622 0.009331 3 3.209961
RITF1 0.000005 0.055988 0.021773 3 3.077765
AGL94 0.000002 0.045101 0.026439 3 3.071542
COL3 0.000002 0.031104 0.060653 3 3.091760
In [50]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[50]:
col_betweenness_centrality col_out_centrality col_in_centrality centrality_count centrality_sum
AT3G52440 0.930689 0.277778 0.107937 3 4.316403
RR1 0.905741 0.060317 0.109524 3 4.075582
TRB2 0.886169 0.050794 0.111111 3 4.048073
NTT 0.871207 0.233333 0.090476 3 4.195016
NAM 0.861640 0.169841 0.080952 3 4.112433
AT3G25790 0.847483 0.153968 0.079365 3 4.080816
TLP9 0.826371 0.085714 0.123810 3 4.035895
AT2G22200 0.782434 0.049206 0.076190 3 3.907831
SNI1 0.722674 0.298413 0.157143 3 4.178229
MBF1B 0.518089 0.049206 0.068254 3 3.635549
IAA20 0.438956 0.482540 0.455556 3 4.377051
AT3G08505 0.112701 0.063492 0.063492 3 3.239685
BEH2 0.056896 0.026984 0.096825 3 3.180705
GATA5 0.053625 0.071429 0.042857 3 3.167911
AT5G16680 0.042769 0.133333 0.074603 3 3.250705
AT5G65910 0.039905 0.036508 0.050794 3 3.127206
HB23 0.035486 0.020635 0.046032 3 3.102153
MYC3 0.034244 0.014286 0.023810 3 3.072340
ARF10 0.025992 0.153968 0.150794 3 3.330754
BZIP25 0.016731 0.171429 0.101587 3 3.289747
APRR8 0.016146 0.039683 0.033333 3 3.089161
AT4G13040 0.012706 0.042857 0.052381 3 3.107944
FRS8 0.011295 0.004762 0.036508 3 3.052565
SMZ 0.010044 0.046032 0.004762 3 3.060837
EMB2773 0.008643 0.007937 0.047619 3 3.064199
STOP1 0.006970 0.084127 0.088889 3 3.179986
NTM1 0.005771 0.100000 0.076190 3 3.181962
NFL 0.005567 0.026984 0.044444 3 3.076995
JMJ18 0.003250 0.030159 0.066667 3 3.100076
AT5G23405 0.001693 0.138095 0.039683 3 3.179471
GAI 0.001380 0.019048 0.049206 3 3.069634
TRP1 0.001297 0.031746 0.103175 3 3.136218
AT5G12400 0.000510 0.042857 0.036508 3 3.079875
CHR17 0.000404 0.028571 0.036508 3 3.065483
BBX30 0.000338 0.019048 0.017460 3 3.036846
AT2G33550 0.000288 0.014286 0.019048 3 3.033621
SPL14 0.000151 0.039683 0.095238 3 3.135072
IAA10 0.000098 0.034921 0.015873 3 3.050892
MBD9 0.000078 0.038095 0.015873 3 3.054046
AT3G05670 0.000043 0.026984 0.049206 3 3.076233
LUG 0.000030 0.047619 0.041270 3 3.088919
GBF3 0.000025 0.017460 0.073016 3 3.090501
TGA4 0.000023 0.033333 0.107937 3 3.141293
CHR11 0.000023 0.068254 0.044444 3 3.112721
EIN3 0.000020 0.065079 0.066667 3 3.131766
DRIP2 0.000020 0.058730 0.025397 3 3.084147
AT2G44430 0.000015 0.038095 0.053968 3 3.092079
E2F1 0.000015 0.038095 0.014286 3 3.052396
AGL80 0.000010 0.019048 0.025397 3 3.044455
CCA1 0.000010 0.031746 0.044444 3 3.076201
PC-MYB1 0.000003 0.038095 0.063492 3 3.101590
In [51]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[51]:
cor_betweenness_centrality end_betweenness_centrality cor_out_centrality end_out_centrality cor_in_centrality end_in_centrality centrality_count centrality_sum
AT1G05710 0.561959 0.011934 0.419872 0.346614 0.089744 0.155378 6 7.585501
LAF1 0.097803 0.000040 0.794872 0.192231 0.080128 0.123506 6 7.288580
JKD 0.003896 0.017005 0.282051 0.130478 0.266026 0.242032 6 6.941487
MYB122 0.008595 0.059323 0.038462 0.288845 0.035256 0.075697 6 6.506178
AT4G28030 0.001226 0.000986 0.237179 0.039841 0.157051 0.019920 6 6.456204
COL4 0.000144 0.000014 0.137821 0.057769 0.060897 0.062749 6 6.319394
ZFN1 0.000010 0.000007 0.128205 0.037849 0.099359 0.049801 6 6.315231
In [52]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[52]:
cor_betweenness_centrality cor_out_centrality cor_in_centrality centrality_count centrality_sum
SIGF 0.969618 0.083333 0.205128 3 4.258080
HAM3 0.680219 0.121795 0.214744 3 4.016757
LRP1 0.459302 0.214744 0.195513 3 3.869558
tny 0.424819 0.589744 0.083333 3 4.097896
BZS1 0.393705 0.230769 0.096154 3 3.720628
AT2G42660 0.331334 0.182692 0.317308 3 3.831334
EIL1 0.197533 0.076923 0.211538 3 3.485994
RGL3 0.144293 0.246795 0.221154 3 3.612241
AT2G46810 0.091217 0.003205 0.083333 3 3.177756
AT1G72210 0.085673 0.217949 0.301282 3 3.604904
HMG 0.052045 0.064103 0.035256 3 3.151404
SCL27 0.015850 0.022436 0.035256 3 3.073543
HK2 0.008286 0.102564 0.022436 3 3.133286
AT2G38300 0.006142 0.394231 0.217949 3 3.618322
JAZ6 0.006070 0.243590 0.125000 3 3.374660
GLK2 0.003710 0.269231 0.067308 3 3.340249
IDD4 0.003236 0.153846 0.051282 3 3.208364
WRKY69 0.001577 0.272436 0.173077 3 3.447090
RR3 0.000917 0.019231 0.035256 3 3.055404
WRKY13 0.000330 0.019231 0.006410 3 3.025971
ETR2 0.000165 0.051282 0.096154 3 3.147601
AT3G61180 0.000113 0.044872 0.022436 3 3.067421
IDD7 0.000041 0.051282 0.019231 3 3.070554
AT1G68070 0.000041 0.108974 0.051282 3 3.160298
AT2G44410 0.000041 0.012821 0.032051 3 3.044913
AGL67 0.000021 0.009615 0.019231 3 3.028867
ULT1 0.000021 0.035256 0.179487 3 3.214764
In [53]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[53]:
end_betweenness_centrality end_out_centrality end_in_centrality centrality_count centrality_sum
MYB68 7.995992e-01 0.703187 0.425299 3 4.928085
MYB36 2.451639e-01 0.967131 1.000996 3 5.213291
MYB74 1.183968e-01 0.980080 0.934263 3 5.032739
BLJ 7.974682e-02 0.302789 0.343625 3 3.726161
chr31 1.286082e-02 0.158367 0.138446 3 3.309674
AGL42 1.105548e-02 0.174303 0.132470 3 3.317828
WLIM2b 5.602714e-03 0.073705 0.087649 3 3.166957
bZIP58 3.717930e-03 0.176295 0.029880 3 3.209893
LRL2 1.992032e-03 0.071713 0.049801 3 3.123506
SCR 1.579922e-03 0.085657 0.123506 3 3.210743
AGL102 7.725827e-04 0.054781 0.009960 3 3.065514
AT4G36860 6.861884e-04 0.095618 0.085657 3 3.181961
JAZ12 6.494461e-04 0.036853 0.085657 3 3.123159
ABF3 2.045656e-04 0.018924 0.042829 3 3.061958
BZIP17 1.568998e-04 0.014940 0.023904 3 3.039002
AT5G58620 1.320739e-04 0.116534 0.051793 3 3.168459
AT2G47850 3.872844e-05 0.005976 0.013944 3 3.019959
AT2G27580 3.674236e-05 0.027888 0.027888 3 3.055814
AT5G51790 3.177718e-05 0.004980 0.000996 3 3.006008
SAP7 7.944295e-06 0.035857 0.048805 3 3.084669
BIB 6.951258e-06 0.067729 0.060757 3 3.128493
AT2G03470 6.951258e-06 0.027888 0.045817 3 3.073712
MYB32 5.958221e-06 0.041833 0.050797 3 3.092635
ING1 1.986074e-06 0.034861 0.022908 3 3.057771
AT3G18870 1.986074e-06 0.024900 0.013944 3 3.038847
ALY3 9.930368e-07 0.028884 0.029880 3 3.058766
VIP1 9.930368e-07 0.042829 0.036853 3 3.079682
AGL16 9.930368e-07 0.036853 0.035857 3 3.072710
In [54]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[54]:
per_betweenness_centrality pro_betweenness_centrality xyl_betweenness_centrality phl_betweenness_centrality per_out_centrality pro_out_centrality xyl_out_centrality phl_out_centrality per_in_centrality pro_in_centrality xyl_in_centrality phl_in_centrality centrality_count centrality_sum
AT3G43430 0.856656 0.975513 0.737149 0.963097 0.733449 0.325062 0.108889 0.257062 0.419861 0.454094 0.182222 0.079096 12 18.092152
In [55]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[55]:
per_betweenness_centrality per_out_centrality per_in_centrality centrality_count centrality_sum
HDA3 0.980566 0.017422 0.212544 3 4.210531
MGP 0.946777 0.106272 0.170732 3 4.223781
NUC 0.929833 0.271777 0.156794 3 4.358405
IDD14 0.881995 0.162021 0.083624 3 4.127640
GAMMA-H2AX 0.877064 0.026132 0.186411 3 4.089607
GATA15 0.852497 0.045296 0.069686 3 3.967480
AT1G26790 0.809673 0.114983 0.033101 3 3.957756
AT3G21330 0.670601 0.017422 0.121951 3 3.809974
NAC2 0.650452 0.179443 0.026132 3 3.856027
LBD14 0.646205 0.059233 0.153310 3 3.858748
KAN2 0.510289 0.071429 0.047038 3 3.628756
ATL5 0.450240 0.186411 0.029617 3 3.666268
AT4G30180 0.406662 0.024390 0.029617 3 3.460669
AT2G20100 0.340311 0.026132 0.024390 3 3.390834
LBD39 0.284823 0.043554 0.059233 3 3.387611
IDD11 0.256985 0.095819 0.033101 3 3.385905
OFP1 0.188132 0.050523 0.022648 3 3.261303
SOG1 0.163413 0.216028 0.238676 3 3.618117
MYB34 0.134289 0.205575 0.132404 3 3.472268
ZFP7 0.133356 0.043554 0.019164 3 3.196074
ERF3 0.071818 0.024390 0.076655 3 3.172863
AT1G04850 0.058978 0.017422 0.174216 3 3.250616
AT2G39020 0.047239 0.005226 0.020906 3 3.073371
AT2G14880 0.028793 0.022648 0.193380 3 3.244821
ARIA 0.022660 0.027875 0.080139 3 3.130674
NF-YA3 0.019346 0.038328 0.041812 3 3.099486
ERF7 0.008376 0.015679 0.087108 3 3.111164
LBD38 0.007680 0.270035 0.040070 3 3.317785
TLP1 0.006178 0.012195 0.078397 3 3.096770
ERF12 0.004254 0.224739 0.217770 3 3.446762
AT3G03590 0.003694 0.005226 0.048780 3 3.057701
AL5 0.003110 0.020906 0.034843 3 3.058859
ATWHY2 0.003086 0.029617 0.094077 3 3.126779
RAP2.2 0.001289 0.080139 0.081882 3 3.163310
BBX29 0.001076 0.017422 0.015679 3 3.034177
IDD16 0.000815 0.054007 0.013937 3 3.068759
HB21 0.000660 0.346690 0.045296 3 3.392646
AT4G17900 0.000490 0.104530 0.236934 3 3.341953
NF-YA2 0.000316 0.015679 0.013937 3 3.029933
MBD13 0.000109 0.003484 0.013937 3 3.017531
NST1 0.000091 0.019164 0.012195 3 3.031450
AT2G42040 0.000085 0.059233 0.116725 3 3.176043
WRKY21 0.000064 0.081882 0.118467 3 3.200412
MBD5 0.000052 0.001742 0.057491 3 3.059285
GATA16 0.000049 0.045296 0.010453 3 3.055798
PRT1 0.000043 0.128920 0.141115 3 3.270077
NF-YB8 0.000009 0.022648 0.036585 3 3.059243
SHR 0.000006 0.055749 0.148084 3 3.203839
MYB65 0.000006 0.024390 0.031359 3 3.055755
CDF2 0.000003 0.078397 0.055749 3 3.134149
In [56]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[56]:
pro_betweenness_centrality pro_out_centrality pro_in_centrality centrality_count centrality_sum
HB18 0.924058 0.089330 0.004963 3 4.018351
AT1G51200 0.888708 0.091811 0.188586 3 4.169105
MYB6 0.303168 0.049628 0.022333 3 3.375128
AT4G17780 0.086818 0.002481 0.029777 3 3.119076
IAA9 0.053017 0.578164 0.421836 3 4.053017
AT1G75490 0.006487 0.007444 0.009926 3 3.023857
HAT9 0.002913 0.002481 0.022333 3 3.027727
STO 0.002481 0.183623 0.124069 3 3.310174
TAFII15 0.002284 0.027295 0.062035 3 3.091614
AT2G40200 0.001228 0.042184 0.027295 3 3.070707
GRP2 0.000031 0.099256 0.186104 3 3.285391
SPL1 0.000012 0.066998 0.114144 3 3.181154
AT1G19000 0.000006 0.037221 0.074442 3 3.111669
In [57]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[57]:
xyl_betweenness_centrality xyl_out_centrality xyl_in_centrality centrality_count centrality_sum
AT1G68200 0.987355 0.522222 0.284444 3 4.794021
VND6 0.982910 0.340000 0.146667 3 4.469577
MYB85 0.978411 0.202222 0.233333 3 4.413967
LBD31 0.969220 0.297778 0.228889 3 4.495887
MYB83 0.967330 0.768889 0.233333 3 4.969552
VND7 0.953784 0.742222 0.153333 3 4.849339
MYB99 0.953571 0.208889 0.164444 3 4.326904
VND1 0.947642 0.344444 0.131111 3 4.423197
AT4G16610 0.941232 0.153333 0.126667 3 4.221232
FBH1 0.941153 0.191111 0.091111 3 4.223375
AT3G10470 0.940470 0.095556 0.097778 3 4.133804
HB31 0.932591 0.293333 0.235556 3 4.461480
XND1 0.893719 0.404444 0.168889 3 4.467053
AT3G22560 0.879713 0.044444 0.097778 3 4.021935
VND5 0.856936 0.657778 0.046667 3 4.561381
MYB46 0.855734 0.864444 0.468889 3 5.189067
IAA6 0.855214 0.524444 0.060000 3 4.439659
VND4 0.728755 0.751111 0.295556 3 4.775422
MYB52 0.671829 0.288889 0.177778 3 4.138495
AT2G04845 0.656407 0.115556 0.155556 3 3.927518
ZHD3 0.631235 0.457778 0.291111 3 4.380124
VND3 0.618471 0.717778 0.277778 3 4.614026
ARR9 0.602460 0.017778 0.028889 3 3.649126
AT3G22100 0.599668 0.028889 0.037778 3 3.666335
AT1G66810 0.342371 0.600000 0.377778 3 4.320148
OFP10 0.217961 0.168889 0.011111 3 3.397961
TCP20 0.188721 0.055556 0.251111 3 3.495387
VND2 0.157496 0.844444 0.457778 3 4.459718
MMD1 0.114437 0.026667 0.002222 3 3.143326
SHP1 0.091725 0.160000 0.046667 3 3.298391
AT5G25470 0.060802 0.013333 0.020000 3 3.094135
AT1G26590 0.050012 0.026667 0.053333 3 3.130012
BZIP49 0.049567 0.115556 0.086667 3 3.251789
ABF4 0.033224 0.042222 0.015556 3 3.091002
AT3G19080 0.031081 0.042222 0.006667 3 3.079970
SHY2 0.015575 0.006667 0.028889 3 3.051131
HB34 0.014843 0.131111 0.053333 3 3.199287
ASL9 0.005009 0.077778 0.040000 3 3.122786
TCP10 0.004425 0.002222 0.013333 3 3.019980
PLIM2b 0.002514 0.173333 0.186667 3 3.362514
GIF3 0.002301 0.168889 0.071111 3 3.242301
MYB25 0.001930 0.062222 0.086667 3 3.150819
AT3G10760 0.001915 0.108889 0.084444 3 3.195249
AT5G04390 0.000678 0.033333 0.026667 3 3.060678
AT5G46910 0.000129 0.186667 0.100000 3 3.286795
AP3 0.000084 0.091111 0.100000 3 3.191195
GATA1 0.000020 0.008889 0.020000 3 3.028909
In [58]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[58]:
phl_betweenness_centrality phl_out_centrality phl_in_centrality centrality_count centrality_sum
AT4G37180 0.969452 0.511299 0.122881 3 4.603633
HCA2 0.799051 0.230226 0.183616 3 4.212893
DOF2.4 0.741457 0.129944 0.121469 3 3.992870
APL 0.642254 0.985876 1.001412 3 5.629542
AT5G02460 0.545010 0.028249 0.170904 3 3.744162
AT5G41380 0.372484 0.557910 0.060734 3 3.991128
NAC057 0.328938 0.331921 0.166667 3 3.827526
BHLH101 0.285558 0.045198 0.052260 3 3.383016
AS1 0.271196 0.019774 0.038136 3 3.329106
AT3G12730 0.188177 0.844633 0.604520 3 4.637329
NF-YB3 0.177087 0.001412 0.011299 3 3.189799
AT2G03500 0.105529 0.615819 0.560734 3 4.282082
VOZ1 0.054274 0.175141 0.001412 3 3.230827
AT1G72010 0.047417 0.029661 0.073446 3 3.150525
AT2G31370 0.033317 0.032486 0.066384 3 3.132187
CRF10 0.004877 0.001412 0.005650 3 3.011939
NAC020 0.004237 0.079096 0.153955 3 3.237288
AT5G09240 0.002977 0.014124 0.049435 3 3.066536
AT1G49560 0.001420 0.104520 0.132768 3 3.238709
SOL1 0.000981 0.050847 0.036723 3 3.088552
AT1G64530 0.000957 0.066384 0.022599 3 3.089940
AT1G72740 0.000503 0.036723 0.046610 3 3.083837
AT5G12850 0.000438 0.015537 0.057910 3 3.073884
SYD 0.000066 0.018362 0.021186 3 3.039614
AT5G09460 0.000042 0.015537 0.022599 3 3.038178
RSZ22a 0.000024 0.009887 0.060734 3 3.070645
AT1G58220 0.000012 0.009887 0.062147 3 3.072046
CRF1 0.000006 0.028249 0.076271 3 3.104526
MBF1A 0.000004 0.042373 0.045198 3 3.087575
bHLH104 0.000004 0.039548 0.062147 3 3.101699
NAC045 0.000004 0.031073 0.040960 3 3.072038
AT5G16470 0.000002 0.060734 0.091808 3 3.152544
HMGB1 0.000002 0.053672 0.070621 3 3.124296

Search for individual genes¶

In [59]:
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[59]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_closeness_centrality end_eigenvector_centrality per_degree_centrality per_out_centrality per_in_centrality per_betweenness_centrality per_closeness_centrality per_eigenvector_centrality pro_degree_centrality pro_out_centrality pro_in_centrality pro_closeness_centrality pro_eigenvector_centrality
SHR 1.0 0.007968 0.002988 0.00498 0.000243 0.006429 0.203833 0.055749 0.148084 0.000006 0.000635 0.053931 0.052109 0.027295 0.024814 0.000196 0.026289
In [60]:
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[60]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
BLJ 1.0 0.646414 0.302789 0.343625 0.079747 0.00042 0.124605
In [61]:
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[61]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
JKD 2.0 0.548077 0.282051 0.266026 0.003896 0.000308 0.119496 0.37251 0.130478 0.242032 0.017005 0.000413 0.100787
In [62]:
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[62]:
end_degree_centrality end_out_centrality end_in_centrality end_closeness_centrality end_eigenvector_centrality
RVN 0.065737 0.034861 0.030876 0.00035 0.033994
In [63]:
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[63]:
tf_occurance end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality
BIB 1.0 0.128486 0.067729 0.060757 0.000007 0.000372 0.05411
In [64]:
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[64]:
In [65]:
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[65]:
In [66]:
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[66]:
tf_occurance atri_degree_centrality atri_out_centrality atri_in_centrality atri_betweenness_centrality atri_closeness_centrality atri_eigenvector_centrality lrc_degree_centrality lrc_out_centrality lrc_in_centrality lrc_closeness_centrality lrc_eigenvector_centrality
GL2 1.0 0.376147 0.149847 0.2263 0.760577 0.000623 0.090427 0.007776 0.006221 0.001555 0.000657 0.006107
In [67]:
tf_spec.to_csv('TF_GRN_centrality_t5-t7_zscore3.csv', index=True)
In [ ]: